We plot the data and can see that there is no obvious large difference between the debt levels or scenarios.
likert.data <- d.both_completed %>%
select(high_debt_version, quality_pre_task)
likert.data$quality_pre_task <- revalue(likert.data$quality_pre_task, c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
))
likert.data$high_debt_version <- revalue(likert.data$high_debt_version, c(
"true"="High Debt",
"false"="Low Debt"
))
ggplot(likert.data, aes(x=quality_pre_task)) +
geom_bar(fill= "Light Blue") +
facet_grid(rows = vars(high_debt_version)) +
scale_y_continuous(limits = NULL, breaks = c(2,4,6,8), labels = c("2","4","6","8")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
likert.data <- d.both_completed %>%
select(scenario, quality_pre_task)
likert.data$quality_pre_task <- revalue(likert.data$quality_pre_task, c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
))
ggplot(likert.data, aes(x=quality_pre_task)) +
geom_bar(fill= "Light Blue") +
facet_grid(rows = vars(scenario)) +
scale_y_continuous(limits = NULL, breaks = c(2,4,6,8), labels = c("2","4","6","8")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
As the data is collected from a likert scale we will use a cumulative family, indicating that each level on the scale is an incremental step. This model is also able to fit the data well.
We include high_debt_verison as a predictor in our model as this variable represent the very effect we want to measure. We also include a varying intercept for each individual to prevent the model from learning too much from single participants with extreme measurements.
We iterate over the model until we have sane priors.
scenario_quality.with <- extendable_model(
base_name = "scenario_quality",
base_formula = "quality_pre_task ~ 1 + high_debt_version + (1 | session)",
base_priors = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = d.both_completed,
)
prior_summary(scenario_quality.with(only_priors= TRUE))
prior_summary(scenario_quality.with(sample_prior = "only"))
pp_check(scenario_quality.with(sample_prior = "only"), nsamples = 200, type = "bars")
We choose a beta parameter priors allowing for the beta parameter to account for 100% of the effect but that is skeptical to such strong effects from the beta parameter.
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 0, 2)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100
data.frame(x = sim.beta.diff) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-150, 150) +
labs(
title = "Beta parameter prior influence",
x = "Estimate with beta as % of estimate without beta",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well. Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.
pp_check(scenario_quality.with(), nsamples = 200, type = "bars")
summary(scenario_quality.with())
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(data) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.37 0.31 0.01 1.12 1.00 1725 1962
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -1.91 0.56 -3.10 -0.89 1.00 4069
## Intercept[2] -0.56 0.43 -1.41 0.24 1.00 6286
## Intercept[3] 0.22 0.42 -0.60 1.04 1.00 5930
## Intercept[4] 0.62 0.43 -0.22 1.45 1.00 5586
## Intercept[5] 1.92 0.49 1.01 2.93 1.00 4599
## Intercept[6] 3.95 0.72 2.67 5.47 1.00 5445
## high_debt_versionfalse 1.38 0.51 0.38 2.43 1.00 5767
## Tail_ESS
## Intercept[1] 2277
## Intercept[2] 3301
## Intercept[3] 3316
## Intercept[4] 3488
## Intercept[5] 3326
## Intercept[6] 3356
## high_debt_versionfalse 3061
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(scenario_quality.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
We use loo to check some possible extensions on the model.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
# New model(s)
scenario_quality.with("work_domain"),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("work_experience_java.s"),
scenario_quality.with("education_field"),
scenario_quality.with("mo(education_level)", edlvl_prior),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("workplace_td_tracking"),
scenario_quality.with("workplace_pair_programming"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("scenario"),
scenario_quality.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## scenario_quality.with("workplace_coding_standards") 0.0 0.0
## scenario_quality.with("work_experience_programming.s") -0.1 1.7
## scenario_quality.with("workplace_peer_review") -0.5 1.2
## scenario_quality.with("work_experience_java.s") -0.7 1.8
## scenario_quality.with("mo(education_level)", edlvl_prior) -1.1 1.3
## scenario_quality.with() -1.3 1.4
## scenario_quality.with("workplace_pair_programming") -1.5 1.4
## scenario_quality.with("workplace_td_tracking") -1.7 1.4
## scenario_quality.with("scenario") -1.8 1.4
## scenario_quality.with("education_field") -1.9 1.6
## scenario_quality.with("group") -2.0 1.2
## scenario_quality.with("work_domain") -2.6 1.6
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.5 4.1
## p_loo 8.7 0.9
## looic 163.1 8.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1343
## (0.5, 0.7] (ok) 2 4.5% 1889
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_domain")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.8 4.4
## p_loo 12.7 1.5
## looic 165.6 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 904
## (0.5, 0.7] (ok) 1 2.3% 1885
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.3 4.2
## p_loo 9.1 0.8
## looic 160.6 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 2178
## (0.5, 0.7] (ok) 1 2.3% 3852
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.2
## p_loo 9.5 1.0
## looic 161.7 8.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 1609
## (0.5, 0.7] (ok) 3 6.8% 1104
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.1 4.0
## p_loo 9.8 1.0
## looic 164.1 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.3 4.3
## p_loo 9.4 1.0
## looic 162.6 8.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1700
## (0.5, 0.7] (ok) 1 2.3% 3371
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.3 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1909
## (0.5, 0.7] (ok) 2 4.5% 1753
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.9 4.1
## p_loo 9.5 1.0
## looic 163.8 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1282
## (0.5, 0.7] (ok) 1 2.3% 3824
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.7 4.2
## p_loo 9.3 1.0
## looic 163.4 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1681
## (0.5, 0.7] (ok) 5 11.4% 1881
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 8.7 0.9
## looic 160.4 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 2088
## (0.5, 0.7] (ok) 2 4.5% 4833
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.0 4.1
## p_loo 9.4 1.0
## looic 164.0 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1256
## (0.5, 0.7] (ok) 2 4.5% 2518
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("group")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.3 4.4
## p_loo 10.5 1.1
## looic 164.5 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("work_experience_java.s"),
# New model(s)
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")),
scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")),
scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")),
scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")),
scenario_quality.with(c("workplace_peer_review", "work_experience_java.s"))
)
loo_result[2]
## $diffs
## elpd_diff
## scenario_quality.with("workplace_coding_standards") 0.0
## scenario_quality.with("work_experience_programming.s") -0.1
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) -0.1
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) -0.4
## scenario_quality.with("workplace_peer_review") -0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) -0.5
## scenario_quality.with("work_experience_java.s") -0.7
## scenario_quality.with(c("workplace_peer_review", "work_experience_java.s")) -0.7
## scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")) -0.7
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")) -0.9
## scenario_quality.with() -1.3
## se_diff
## scenario_quality.with("workplace_coding_standards") 0.0
## scenario_quality.with("work_experience_programming.s") 1.7
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) 1.3
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) 1.3
## scenario_quality.with("workplace_peer_review") 1.2
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) 1.6
## scenario_quality.with("work_experience_java.s") 1.8
## scenario_quality.with(c("workplace_peer_review", "work_experience_java.s")) 1.6
## scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")) 0.6
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")) 1.8
## scenario_quality.with() 1.4
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.5 4.1
## p_loo 8.7 0.9
## looic 163.1 8.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1343
## (0.5, 0.7] (ok) 2 4.5% 1889
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.3 4.2
## p_loo 9.1 0.8
## looic 160.6 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 2178
## (0.5, 0.7] (ok) 1 2.3% 3852
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 8.7 0.9
## looic 160.4 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 2088
## (0.5, 0.7] (ok) 2 4.5% 4833
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.3 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1909
## (0.5, 0.7] (ok) 2 4.5% 1753
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.2
## p_loo 9.5 1.0
## looic 161.7 8.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 1609
## (0.5, 0.7] (ok) 3 6.8% 1104
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.4 4.3
## p_loo 9.8 0.9
## looic 160.7 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.4
## p_loo 10.0 1.0
## looic 161.4 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1693
## (0.5, 0.7] (ok) 5 11.4% 1404
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.1 4.2
## p_loo 9.9 1.0
## looic 162.2 8.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 844
## (0.5, 0.7] (ok) 2 4.5% 1412
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.3
## p_loo 9.6 1.0
## looic 161.8 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 9.9 1.0
## looic 161.3 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1584
## (0.5, 0.7] (ok) 1 2.3% 4904
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_peer_review", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.3
## p_loo 9.9 1.0
## looic 161.7 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1647
## (0.5, 0.7] (ok) 1 2.3% 4287
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("work_experience_java.s"),
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")),
scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")),
# New model(s)
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")),
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")),
scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review"))
)
loo_result[2]
## $diffs
## elpd_diff
## scenario_quality.with("workplace_coding_standards") 0.0
## scenario_quality.with("work_experience_programming.s") -0.1
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) -0.1
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) -0.4
## scenario_quality.with("workplace_peer_review") -0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) -0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")) -0.6
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")) -0.6
## scenario_quality.with("work_experience_java.s") -0.7
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")) -0.8
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review")) -1.0
## scenario_quality.with() -1.3
## se_diff
## scenario_quality.with("workplace_coding_standards") 0.0
## scenario_quality.with("work_experience_programming.s") 1.7
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) 1.3
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) 1.3
## scenario_quality.with("workplace_peer_review") 1.2
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) 1.6
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")) 1.4
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")) 1.4
## scenario_quality.with("work_experience_java.s") 1.8
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")) 1.7
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review")) 1.3
## scenario_quality.with() 1.4
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.5 4.1
## p_loo 8.7 0.9
## looic 163.1 8.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1343
## (0.5, 0.7] (ok) 2 4.5% 1889
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.3 4.2
## p_loo 9.1 0.8
## looic 160.6 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 2178
## (0.5, 0.7] (ok) 1 2.3% 3852
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 8.7 0.9
## looic 160.4 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 2088
## (0.5, 0.7] (ok) 2 4.5% 4833
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.3 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1909
## (0.5, 0.7] (ok) 2 4.5% 1753
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.2
## p_loo 9.5 1.0
## looic 161.7 8.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 1609
## (0.5, 0.7] (ok) 3 6.8% 1104
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.4 4.3
## p_loo 9.8 0.9
## looic 160.7 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.4
## p_loo 10.0 1.0
## looic 161.4 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1693
## (0.5, 0.7] (ok) 5 11.4% 1404
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 9.9 1.0
## looic 161.3 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1584
## (0.5, 0.7] (ok) 1 2.3% 4904
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.8 4.4
## p_loo 10.3 1.1
## looic 161.6 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1465
## (0.5, 0.7] (ok) 2 4.5% 1274
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.3
## p_loo 10.3 1.0
## looic 161.7 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1493
## (0.5, 0.7] (ok) 2 4.5% 682
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.0 4.4
## p_loo 10.3 1.0
## looic 162.1 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.2 4.4
## p_loo 10.6 1.1
## looic 162.5 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1676
## (0.5, 0.7] (ok) 1 2.3% 886
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
We pick some of our top performing models as candidates and inspect them closer.
The candidate models are named and listed in order of complexity.
We select the simplest model as a baseline.
scenario_quality0 <- brm(
"quality_pre_task ~ 1 + high_debt_version + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality0",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality0)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.37 0.31 0.01 1.12 1.00 1725 1962
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -1.91 0.56 -3.10 -0.89 1.00 4069
## Intercept[2] -0.56 0.43 -1.41 0.24 1.00 6286
## Intercept[3] 0.22 0.42 -0.60 1.04 1.00 5930
## Intercept[4] 0.62 0.43 -0.22 1.45 1.00 5586
## Intercept[5] 1.92 0.49 1.01 2.93 1.00 4599
## Intercept[6] 3.95 0.72 2.67 5.47 1.00 5445
## high_debt_versionfalse 1.38 0.51 0.38 2.43 1.00 5767
## Tail_ESS
## Intercept[1] 2277
## Intercept[2] 3301
## Intercept[3] 3316
## Intercept[4] 3488
## Intercept[5] 3326
## Intercept[6] 3356
## high_debt_versionfalse 3061
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality0)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.148757336 0.4274483 -1.2722152 0.5444956
## 6033d90a5af2c702367b3a96 0.005416289 0.4128986 -0.9267926 0.9406170
## 6034fc165af2c702367b3a98 -0.014267531 0.3968438 -0.9332963 0.8184115
## 603500725af2c702367b3a99 0.279008390 0.5682792 -0.4151116 1.8818775
## 603f97625af2c702367b3a9d -0.020459904 0.3893067 -0.9277874 0.8107566
## 603fd5d95af2c702367b3a9e 0.029535228 0.4150157 -0.8685716 1.0203800
## 60409b7b5af2c702367b3a9f -0.028281183 0.3871566 -0.9065058 0.8081764
## 604b82b5a7718fbed181b336 0.227368475 0.5024917 -0.5079340 1.5935208
## 6050c1bf856f36729d2e5218 -0.102620034 0.4165942 -1.1489405 0.6671387
## 6050e1e7856f36729d2e5219 0.013428652 0.4441066 -0.9188757 1.0198018
## 6055fdc6856f36729d2e521b 0.016536197 0.4010119 -0.8604421 0.9476857
## 60589862856f36729d2e521f -0.141239505 0.4459134 -1.3612020 0.5931267
## 605afa3a856f36729d2e5222 -0.017106577 0.4028785 -0.9615412 0.8298204
## 605c8bc6856f36729d2e5223 -0.154006262 0.4455790 -1.3561902 0.5565008
## 605f3f2d856f36729d2e5224 -0.148160849 0.4553474 -1.3879750 0.5889337
## 605f46c3856f36729d2e5225 0.073108403 0.3978411 -0.7051325 1.0218740
## 60605337856f36729d2e5226 -0.076285734 0.4048755 -1.0535310 0.7296984
## 60609ae6856f36729d2e5228 0.010919700 0.4109532 -0.8713713 0.9744957
## 6061ce91856f36729d2e522e 0.006728872 0.4241197 -0.9133587 0.9284082
## 6061f106856f36729d2e5231 0.232452438 0.5249136 -0.4757181 1.6752953
## 6068ea9f856f36729d2e523e -0.038388053 0.4362776 -1.0367480 0.8615274
## 6075ab05856f36729d2e5247 0.068679481 0.3910206 -0.6792862 1.0076620
plot(scenario_quality0, ask = FALSE)
pp_check(scenario_quality0, nsamples = 200, type = "bars")
We select the best performing model with one variable.
scenario_quality1 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality1",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality1)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.34 0.28 0.01 1.03 1.00 2324 1940
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.96 0.53 -3.07 -0.97 1.00
## Intercept[2] -0.54 0.42 -1.40 0.26 1.00
## Intercept[3] 0.25 0.42 -0.54 1.06 1.00
## Intercept[4] 0.65 0.42 -0.17 1.49 1.00
## Intercept[5] 1.98 0.50 1.03 3.00 1.00
## Intercept[6] 4.07 0.73 2.75 5.58 1.00
## high_debt_versionfalse 1.44 0.51 0.48 2.46 1.00
## work_experience_programming.s -0.55 0.29 -1.13 0.01 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 4333 3131
## Intercept[2] 6311 3171
## Intercept[3] 5115 3345
## Intercept[4] 5026 3480
## Intercept[5] 5482 3648
## Intercept[6] 6200 3138
## high_debt_versionfalse 6083 3113
## work_experience_programming.s 6015 2205
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality1)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.149441541 0.4108734 -1.2418117 0.5100182
## 6033d90a5af2c702367b3a96 -0.018268131 0.3916576 -0.9525293 0.8393857
## 6034fc165af2c702367b3a98 -0.039695747 0.3648741 -0.8945553 0.7079969
## 603500725af2c702367b3a99 0.222894040 0.5040741 -0.4453012 1.6231085
## 603f97625af2c702367b3a9d -0.045715269 0.3831279 -0.9582082 0.7430469
## 603fd5d95af2c702367b3a9e 0.013437703 0.3960254 -0.8277834 0.8980220
## 60409b7b5af2c702367b3a9f -0.059617978 0.3833850 -0.9965317 0.6834560
## 604b82b5a7718fbed181b336 0.187286501 0.4445182 -0.4525978 1.3565720
## 6050c1bf856f36729d2e5218 -0.093343894 0.3842176 -1.0743663 0.5947527
## 6050e1e7856f36729d2e5219 0.013442184 0.4259396 -0.8647352 0.9520609
## 6055fdc6856f36729d2e521b -0.018211354 0.3856922 -0.8591817 0.7911348
## 60589862856f36729d2e521f -0.033836423 0.3960021 -0.9377979 0.8083069
## 605afa3a856f36729d2e5222 0.090437038 0.4049246 -0.6479131 1.1065945
## 605c8bc6856f36729d2e5223 -0.118587559 0.4273448 -1.2033475 0.6322399
## 605f3f2d856f36729d2e5224 0.002794025 0.3982304 -0.8620396 0.8723002
## 605f46c3856f36729d2e5225 0.032883889 0.3821444 -0.7726869 0.9071935
## 60605337856f36729d2e5226 -0.088962864 0.3864880 -1.0796080 0.6321384
## 60609ae6856f36729d2e5228 -0.021481624 0.3928880 -0.9118954 0.8278850
## 6061ce91856f36729d2e522e -0.014614745 0.3839837 -0.8671561 0.8044789
## 6061f106856f36729d2e5231 0.183811317 0.4550526 -0.4890935 1.3732763
## 6068ea9f856f36729d2e523e -0.050618168 0.4062034 -1.0092622 0.7521737
## 6075ab05856f36729d2e5247 0.037184902 0.3651405 -0.7538955 0.8541869
plot(scenario_quality1, ask = FALSE)
pp_check(scenario_quality1, nsamples = 200, type = "bars")
We select the best performing model with two variables.
scenario_quality2 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality2",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality2)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.34 0.27 0.01 1.01 1.00 1923 1491
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.74 0.60 -2.96 -0.59 1.00
## Intercept[2] -0.31 0.48 -1.28 0.62 1.00
## Intercept[3] 0.50 0.50 -0.46 1.47 1.00
## Intercept[4] 0.91 0.50 -0.06 1.90 1.00
## Intercept[5] 2.25 0.55 1.21 3.38 1.00
## Intercept[6] 4.40 0.79 2.94 6.03 1.00
## high_debt_versionfalse 1.46 0.51 0.47 2.48 1.00
## work_experience_programming.s -0.45 0.31 -1.09 0.18 1.00
## workplace_coding_standardsfalse 0.56 0.52 -0.46 1.58 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5150 3203
## Intercept[2] 5709 3271
## Intercept[3] 4948 3411
## Intercept[4] 4868 3207
## Intercept[5] 5024 3289
## Intercept[6] 6061 3294
## high_debt_versionfalse 6933 2884
## work_experience_programming.s 6009 2927
## workplace_coding_standardsfalse 6035 2935
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality2)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.180458351 0.4270973 -1.3037527 0.4825258
## 6033d90a5af2c702367b3a96 0.008479520 0.3696732 -0.7489860 0.8353923
## 6034fc165af2c702367b3a98 -0.075493730 0.3828212 -1.0380275 0.6354802
## 603500725af2c702367b3a99 0.219477848 0.4898003 -0.4176519 1.6338953
## 603f97625af2c702367b3a9d -0.020540678 0.3586060 -0.8600345 0.7473146
## 603fd5d95af2c702367b3a9e 0.005121436 0.3959086 -0.8334762 0.9060175
## 60409b7b5af2c702367b3a9f -0.088612775 0.3820658 -1.0323360 0.6088298
## 604b82b5a7718fbed181b336 0.173096360 0.4463693 -0.4850745 1.3522493
## 6050c1bf856f36729d2e5218 -0.050797783 0.3682345 -0.9191268 0.6860745
## 6050e1e7856f36729d2e5219 -0.008389102 0.4117713 -0.9474704 0.8922677
## 6055fdc6856f36729d2e521b 0.015593136 0.3765757 -0.8050635 0.8829831
## 60589862856f36729d2e521f -0.037135159 0.4024745 -0.9702505 0.7698183
## 605afa3a856f36729d2e5222 0.092127928 0.4091607 -0.6931762 1.0993323
## 605c8bc6856f36729d2e5223 -0.111850979 0.4191050 -1.1672070 0.5851808
## 605f3f2d856f36729d2e5224 -0.006112181 0.4098042 -0.8736870 0.8929565
## 605f46c3856f36729d2e5225 0.061665421 0.3887107 -0.7043392 0.9682669
## 60605337856f36729d2e5226 -0.064245680 0.3914460 -0.9801392 0.6606860
## 60609ae6856f36729d2e5228 -0.041074052 0.3916796 -0.9402093 0.7871625
## 6061ce91856f36729d2e522e 0.010327374 0.3925069 -0.8524621 0.8578924
## 6061f106856f36729d2e5231 0.163677977 0.4277211 -0.4879881 1.2978450
## 6068ea9f856f36729d2e523e -0.078619181 0.4066479 -1.0201277 0.6808658
## 6075ab05856f36729d2e5247 0.021491995 0.3991338 -0.8014571 0.9276029
plot(scenario_quality2, ask = FALSE)
pp_check(scenario_quality2, nsamples = 200, type = "bars")
We select the best performing model with three variables.
scenario_quality3 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + work_experience_java.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality3",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality3)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + work_experience_java.s + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.35 0.29 0.01 1.03 1.00 2291 2204
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.74 0.60 -3.03 -0.65 1.00
## Intercept[2] -0.30 0.48 -1.26 0.60 1.00
## Intercept[3] 0.51 0.48 -0.42 1.45 1.00
## Intercept[4] 0.92 0.48 -0.01 1.86 1.00
## Intercept[5] 2.26 0.56 1.20 3.38 1.00
## Intercept[6] 4.39 0.79 2.98 6.04 1.00
## high_debt_versionfalse 1.46 0.51 0.47 2.50 1.00
## work_experience_programming.s -0.37 0.63 -1.61 0.87 1.00
## workplace_coding_standardsfalse 0.55 0.52 -0.49 1.59 1.00
## work_experience_java.s -0.09 0.62 -1.34 1.15 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5430 3522
## Intercept[2] 6206 3657
## Intercept[3] 5830 3594
## Intercept[4] 5227 3710
## Intercept[5] 5502 3152
## Intercept[6] 6347 3456
## high_debt_versionfalse 6797 2739
## work_experience_programming.s 3580 2697
## workplace_coding_standardsfalse 6019 3114
## work_experience_java.s 3618 3222
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality3)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.181816280 0.4405628 -1.3184627 0.5029155
## 6033d90a5af2c702367b3a96 0.012885467 0.4084165 -0.8527901 0.9162256
## 6034fc165af2c702367b3a98 -0.077128818 0.3928361 -1.0338050 0.6318645
## 603500725af2c702367b3a99 0.223428856 0.5001043 -0.4735747 1.6129773
## 603f97625af2c702367b3a9d -0.016336991 0.3942060 -0.9149026 0.8334572
## 603fd5d95af2c702367b3a9e 0.007157820 0.4067837 -0.8977912 0.9522330
## 60409b7b5af2c702367b3a9f -0.093887391 0.4058748 -1.0898480 0.6186677
## 604b82b5a7718fbed181b336 0.186859017 0.4575911 -0.4647068 1.4106640
## 6050c1bf856f36729d2e5218 -0.070806731 0.3952084 -1.0623733 0.6944021
## 6050e1e7856f36729d2e5219 0.004996972 0.4147346 -0.8998335 0.9222867
## 6055fdc6856f36729d2e521b 0.008621805 0.3792548 -0.8077176 0.8580448
## 60589862856f36729d2e521f -0.036330945 0.3987656 -0.9807460 0.7846111
## 605afa3a856f36729d2e5222 0.109041052 0.4176434 -0.6359509 1.1738600
## 605c8bc6856f36729d2e5223 -0.104286003 0.4165126 -1.1368428 0.6021431
## 605f3f2d856f36729d2e5224 -0.004225356 0.4349277 -1.0004760 0.9755948
## 605f46c3856f36729d2e5225 0.076963645 0.3992299 -0.6815372 1.0371310
## 60605337856f36729d2e5226 -0.064664560 0.3894382 -0.9792148 0.6923187
## 60609ae6856f36729d2e5228 -0.044514846 0.3941520 -0.9885860 0.7684634
## 6061ce91856f36729d2e522e 0.006988554 0.3960568 -0.8481781 0.8806837
## 6061f106856f36729d2e5231 0.171452012 0.4655336 -0.5723152 1.3919538
## 6068ea9f856f36729d2e523e -0.069521401 0.3855070 -0.9682157 0.6737380
## 6075ab05856f36729d2e5247 0.029378220 0.3772964 -0.7610164 0.9193851
plot(scenario_quality3, ask = FALSE)
pp_check(scenario_quality3, nsamples = 200, type = "bars")
All candidate models look nice, none is significantly better than the others, we will proceed the model containing work experince as it otherwise ourd be added in the next step: scenario_quality1
Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
scenario_quality1.all <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.completed),
file = "fits/scenario_quality1.all",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality1.all)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.32 0.26 0.01 0.95 1.00 2038 2168
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -2.07 0.56 -3.25 -1.09 1.00
## Intercept[2] -0.58 0.40 -1.40 0.19 1.00
## Intercept[3] 0.30 0.39 -0.45 1.08 1.00
## Intercept[4] 0.82 0.41 0.02 1.61 1.00
## Intercept[5] 2.07 0.47 1.22 3.03 1.00
## Intercept[6] 4.20 0.73 2.87 5.73 1.00
## high_debt_versionfalse 1.41 0.47 0.51 2.35 1.00
## work_experience_programming.s -0.52 0.28 -1.09 0.01 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5706 3125
## Intercept[2] 7099 3356
## Intercept[3] 5807 3189
## Intercept[4] 5720 3664
## Intercept[5] 6039 3338
## Intercept[6] 6800 3157
## high_debt_versionfalse 7221 2650
## work_experience_programming.s 6539 2911
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality1.all)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -0.0353547679 0.3684315 -0.8732521 0.7112988
## 6033d69a5af2c702367b3a95 -0.1413119416 0.4030653 -1.1485067 0.4797834
## 6033d90a5af2c702367b3a96 -0.0208459511 0.3580224 -0.8071819 0.7554175
## 6034fc165af2c702367b3a98 -0.0269999916 0.3487526 -0.8398003 0.6936706
## 603500725af2c702367b3a99 0.2021893020 0.4548143 -0.4125674 1.4340445
## 603f84f15af2c702367b3a9b -0.0151239606 0.3740174 -0.8867379 0.7698078
## 603f97625af2c702367b3a9d -0.0371455204 0.3654678 -0.8731478 0.7052263
## 603fd5d95af2c702367b3a9e 0.0202960837 0.3851988 -0.8037648 0.9078606
## 60409b7b5af2c702367b3a9f -0.0505209542 0.3595072 -0.8765139 0.6604467
## 604b82b5a7718fbed181b336 0.1825243864 0.4414368 -0.4414388 1.3367323
## 604f1239a7718fbed181b33f 0.0240406509 0.3738950 -0.7110932 0.8656975
## 6050c1bf856f36729d2e5218 -0.0701165447 0.3607500 -0.9657295 0.6263761
## 6050e1e7856f36729d2e5219 -0.0008644719 0.4043399 -0.8647437 0.9203647
## 6055fdc6856f36729d2e521b -0.0084168631 0.3825387 -0.8508590 0.8129208
## 60579f2a856f36729d2e521e -0.1296517372 0.4157756 -1.2720045 0.5634484
## 60589862856f36729d2e521f -0.0239353940 0.3939330 -0.9714840 0.7992649
## 605a30a7856f36729d2e5221 0.0883137766 0.4116627 -0.6470882 1.1351385
## 605afa3a856f36729d2e5222 0.0814823091 0.3751161 -0.6441072 1.0178170
## 605c8bc6856f36729d2e5223 -0.1084673326 0.3874249 -1.1325378 0.5577435
## 605f3f2d856f36729d2e5224 0.0069754774 0.3992166 -0.8569874 0.8802913
## 605f46c3856f36729d2e5225 0.0466065940 0.3597900 -0.6708573 0.8918658
## 60605337856f36729d2e5226 -0.0736029457 0.3553653 -0.9764340 0.5885521
## 60609ae6856f36729d2e5228 -0.0196961625 0.3582248 -0.8687659 0.7400521
## 6061ce91856f36729d2e522e -0.0049975606 0.3668825 -0.8133722 0.7892938
## 6061f106856f36729d2e5231 0.1791821574 0.4288701 -0.4504743 1.3685380
## 60672faa856f36729d2e523c -0.0674552639 0.4014980 -1.0374045 0.7153763
## 6068ea9f856f36729d2e523e -0.0442986076 0.3752380 -0.9644828 0.7221388
## 606db69d856f36729d2e5243 -0.0492975880 0.3868036 -0.9862926 0.7298654
## 6075ab05856f36729d2e5247 0.0525727024 0.3551333 -0.6528852 0.9146007
plot(scenario_quality1.all, ask = FALSE)
pp_check(scenario_quality1.all, nsamples = 200, type = "bars")
This means that our final model, with all data points and experience predictors, is scenario_quality1.all
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(scenario_quality1.all, pars = c("b_high_debt_versionfalse", "b_work_experience_programming.s"), prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c("High debt version: false", "Professional programming experience")) +
ggtitle("Beta parameters densities in scenario quality model", subtitle = "Shaded region marks 95% of the density. Line marks the median")
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- expand.grid(
high_debt_version = c("false", "true"),
session = NA,
work_experience_programming.s = sapply(c(0, 3, 10, 25, 40), scale_programming_experience)
)
post <- posterior_predict(scenario_quality1.all, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
select(
estimate,
high_debt_version,
work_experience_programming
)%>%
mutate(estimate = estimate)
post.nice <- post %>% mutate_at("estimate", function(x) revalue(as.ordered(x), c(
"1"="Very Bad",
"2"="Bad",
"3"="Somewhat Bad",
"4"="Neutral",
"5"="Somewhat Good",
"6"="Good",
"7"="Very Good"
)))
post.nice$high_debt_version <- revalue(post.nice$high_debt_version, c(
"true"="High Debt",
"false"="Low Debt"
))
post.nice.3 <- filter(post.nice, work_experience_programming == 3)
vline.data.3 <- post.nice.3 %>%
group_by(high_debt_version) %>%
summarize(z = mean(as.numeric(estimate)))
sprintf("Estimations for 3 years experience")
## [1] "Estimations for 3 years experience"
post.nice.3 %>%
ggplot() +
geom_histogram(aes(x=estimate),fill= "Light Blue", stat = "count") +
geom_vline(aes(xintercept = z),
vline.data.3,
col = "Dark Blue",
lwd = 1)+
facet_grid(rows = vars(high_debt_version)) +
scale_y_continuous(limits = NULL, breaks = c(400,800,1200,1600), labels = c("10%","20%","30%","40%")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
post.nice.25 <- filter(post.nice, work_experience_programming == 25)
vline.data.25 <- post.nice.25 %>%
group_by(high_debt_version) %>%
summarize(z = mean(as.numeric(estimate)))
sprintf("Estimations for 25 years experience")
## [1] "Estimations for 25 years experience"
post.nice.25 %>%
ggplot() +
geom_histogram(aes(x=estimate),fill= "Light Blue", stat = "count") +
geom_vline(aes(xintercept = z),
vline.data.25,
col = "Dark Blue",
lwd = 1)+
facet_grid(rows = vars(high_debt_version)) +
scale_y_continuous(limits = NULL, breaks = c(400,800,1200,1600), labels = c("10%","20%","30%","40%")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
post.diff <- post %>% filter(high_debt_version == "true")
post.diff$estimate = post.diff$estimate - filter(post, high_debt_version == "false")$estimate
post.diff %>%
ggplot(aes(x=estimate)) +
geom_boxplot(quantile_lines = TRUE, quantile_fun = hdi, vline_linetype = 2) +
facet_grid(rows = vars(work_experience_programming)) +
labs(
title = "Scenario rating diff / years of programming experience",
subtitle = "Difference as: high debt rating - low debt rating",
x = "Rating difference"
) +
scale_y_continuous(breaks = NULL)
We can then proceed to calculate some likelihoods:
post.diff.10 <- post.diff %>% filter(work_experience_programming == 10)
high_debt_rated_higher <- sum(post.diff.10$estimate > 0)
low_debt_rated_higher <- sum(post.diff.10$estimate < 0)
x <- low_debt_rated_higher / high_debt_rated_higher
x
## [1] 2.970238
Participants with 10 years of professional programming experience were 197% more likely to rate the high debt version scenario as worse than then they were to rate the low debt version scenario as worse.